from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json') as response:
states = json.load(response)
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/visualization-project-group/surprise-map-assignment/master/hate_crime.csv",usecols=['STATE_NAME', 'BIAS_DESC', 'DATA_YEAR'])
df2 = df.groupby(['STATE_NAME','DATA_YEAR']).agg('count')
df2[:50]
| BIAS_DESC | ||
|---|---|---|
| STATE_NAME | DATA_YEAR | |
| Alabama | 1992 | 4 |
| 1993 | 5 | |
| 2002 | 2 | |
| 2003 | 1 | |
| 2004 | 3 | |
| 2007 | 7 | |
| 2008 | 11 | |
| 2009 | 9 | |
| 2010 | 19 | |
| 2011 | 83 | |
| 2012 | 6 | |
| 2013 | 6 | |
| 2014 | 9 | |
| 2015 | 10 | |
| 2016 | 14 | |
| 2017 | 14 | |
| 2018 | 2 | |
| 2019 | 5 | |
| Alaska | 1993 | 24 |
| 1994 | 9 | |
| 1995 | 8 | |
| 1996 | 9 | |
| 1997 | 10 | |
| 1999 | 5 | |
| 2000 | 4 | |
| 2001 | 20 | |
| 2002 | 7 | |
| 2003 | 13 | |
| 2004 | 9 | |
| 2005 | 4 | |
| 2006 | 6 | |
| 2007 | 8 | |
| 2008 | 8 | |
| 2009 | 9 | |
| 2010 | 7 | |
| 2011 | 8 | |
| 2012 | 6 | |
| 2013 | 8 | |
| 2014 | 6 | |
| 2015 | 8 | |
| 2016 | 11 | |
| 2017 | 4 | |
| 2018 | 5 | |
| 2019 | 15 | |
| Arizona | 1991 | 48 |
| 1992 | 172 | |
| 1993 | 208 | |
| 1994 | 205 | |
| 1995 | 220 | |
| 1996 | 250 |
df2['Anti-Asian-count'] = df.query("BIAS_DESC == 'Anti-Asian'").groupby(['STATE_NAME', 'DATA_YEAR']).agg('count')
df2 = df2.fillna(0)
df2['Anti-Asian-rate'] = df2['Anti-Asian-count']/df2['BIAS_DESC']
df2[:50]
| BIAS_DESC | Anti-Asian-count | Anti-Asian-rate | ||
|---|---|---|---|---|
| STATE_NAME | DATA_YEAR | |||
| Alabama | 1992 | 4 | 0.0 | 0.000000 |
| 1993 | 5 | 0.0 | 0.000000 | |
| 2002 | 2 | 0.0 | 0.000000 | |
| 2003 | 1 | 0.0 | 0.000000 | |
| 2004 | 3 | 0.0 | 0.000000 | |
| 2007 | 7 | 1.0 | 0.142857 | |
| 2008 | 11 | 0.0 | 0.000000 | |
| 2009 | 9 | 0.0 | 0.000000 | |
| 2010 | 19 | 0.0 | 0.000000 | |
| 2011 | 83 | 1.0 | 0.012048 | |
| 2012 | 6 | 0.0 | 0.000000 | |
| 2013 | 6 | 0.0 | 0.000000 | |
| 2014 | 9 | 1.0 | 0.111111 | |
| 2015 | 10 | 0.0 | 0.000000 | |
| 2016 | 14 | 0.0 | 0.000000 | |
| 2017 | 14 | 0.0 | 0.000000 | |
| 2018 | 2 | 0.0 | 0.000000 | |
| 2019 | 5 | 0.0 | 0.000000 | |
| Alaska | 1993 | 24 | 2.0 | 0.083333 |
| 1994 | 9 | 0.0 | 0.000000 | |
| 1995 | 8 | 1.0 | 0.125000 | |
| 1996 | 9 | 0.0 | 0.000000 | |
| 1997 | 10 | 1.0 | 0.100000 | |
| 1999 | 5 | 0.0 | 0.000000 | |
| 2000 | 4 | 0.0 | 0.000000 | |
| 2001 | 20 | 0.0 | 0.000000 | |
| 2002 | 7 | 0.0 | 0.000000 | |
| 2003 | 13 | 1.0 | 0.076923 | |
| 2004 | 9 | 1.0 | 0.111111 | |
| 2005 | 4 | 0.0 | 0.000000 | |
| 2006 | 6 | 0.0 | 0.000000 | |
| 2007 | 8 | 0.0 | 0.000000 | |
| 2008 | 8 | 1.0 | 0.125000 | |
| 2009 | 9 | 0.0 | 0.000000 | |
| 2010 | 7 | 0.0 | 0.000000 | |
| 2011 | 8 | 0.0 | 0.000000 | |
| 2012 | 6 | 1.0 | 0.166667 | |
| 2013 | 8 | 0.0 | 0.000000 | |
| 2014 | 6 | 0.0 | 0.000000 | |
| 2015 | 8 | 0.0 | 0.000000 | |
| 2016 | 11 | 1.0 | 0.090909 | |
| 2017 | 4 | 0.0 | 0.000000 | |
| 2018 | 5 | 0.0 | 0.000000 | |
| 2019 | 15 | 0.0 | 0.000000 | |
| Arizona | 1991 | 48 | 2.0 | 0.041667 |
| 1992 | 172 | 8.0 | 0.046512 | |
| 1993 | 208 | 9.0 | 0.043269 | |
| 1994 | 205 | 6.0 | 0.029268 | |
| 1995 | 220 | 9.0 | 0.040909 | |
| 1996 | 250 | 5.0 | 0.020000 |
df3 = df2.query('DATA_YEAR>=2009 & DATA_YEAR<=2018').groupby(['STATE_NAME']).agg('mean')
df3[:50]
| BIAS_DESC | Anti-Asian-count | Anti-Asian-rate | |
|---|---|---|---|
| STATE_NAME | |||
| Alabama | 17.2 | 0.200000 | 0.012316 |
| Alaska | 7.2 | 0.200000 | 0.025758 |
| Arizona | 221.6 | 4.000000 | 0.018107 |
| Arkansas | 32.8 | 0.600000 | 0.009146 |
| California | 959.8 | 27.200000 | 0.028152 |
| Colorado | 134.6 | 2.300000 | 0.016700 |
| Connecticut | 131.5 | 3.800000 | 0.031444 |
| Delaware | 18.5 | 0.600000 | 0.030093 |
| District of Columbia | 97.9 | 1.000000 | 0.010958 |
| Federal | 14.0 | 0.000000 | 0.000000 |
| Florida | 118.7 | 1.400000 | 0.011173 |
| Georgia | 31.7 | 0.800000 | 0.027123 |
| Guam | 1.0 | 0.000000 | 0.000000 |
| Hawaii | 44.0 | 6.000000 | 0.136364 |
| Idaho | 32.1 | 0.400000 | 0.012256 |
| Illinois | 98.7 | 1.900000 | 0.017934 |
| Indiana | 75.3 | 1.700000 | 0.026156 |
| Iowa | 12.6 | 0.700000 | 0.055120 |
| Kansas | 74.2 | 1.400000 | 0.018572 |
| Kentucky | 204.2 | 2.900000 | 0.014771 |
| Louisiana | 22.3 | 0.100000 | 0.002500 |
| Maine | 40.2 | 0.500000 | 0.013078 |
| Maryland | 56.6 | 1.200000 | 0.021386 |
| Massachusetts | 362.7 | 12.000000 | 0.033657 |
| Michigan | 389.0 | 8.000000 | 0.021604 |
| Minnesota | 124.0 | 4.000000 | 0.030746 |
| Mississippi | 13.0 | 0.333333 | 0.048990 |
| Missouri | 104.0 | 1.400000 | 0.012809 |
| Montana | 28.8 | 0.000000 | 0.000000 |
| Nebraska | 39.2 | 1.000000 | 0.027177 |
| Nevada | 50.0 | 0.700000 | 0.015617 |
| New Hampshire | 21.7 | 1.100000 | 0.048537 |
| New Jersey | 451.8 | 9.500000 | 0.021666 |
| New Mexico | 19.1 | 0.400000 | 0.021558 |
| New York | 593.9 | 8.100000 | 0.013942 |
| North Carolina | 130.2 | 1.400000 | 0.010333 |
| North Dakota | 25.7 | 0.400000 | 0.011749 |
| Ohio | 346.5 | 3.700000 | 0.011713 |
| Oklahoma | 36.6 | 0.800000 | 0.017184 |
| Oregon | 106.3 | 1.100000 | 0.008745 |
| Pennsylvania | 53.2 | 1.900000 | 0.038109 |
| Rhode Island | 15.8 | 0.300000 | 0.018803 |
| South Carolina | 78.9 | 0.400000 | 0.005007 |
| South Dakota | 25.3 | 0.100000 | 0.004545 |
| Tennessee | 186.0 | 1.500000 | 0.008233 |
| Texas | 192.8 | 4.100000 | 0.021408 |
| Utah | 62.3 | 1.000000 | 0.015070 |
| Vermont | 20.0 | 0.400000 | 0.035049 |
| Virginia | 151.1 | 3.200000 | 0.022477 |
| Washington | 322.6 | 10.700000 | 0.033506 |
df4 = df2.query('DATA_YEAR==2019').groupby(['STATE_NAME']).agg('mean')
df4 = df4.drop(columns=['BIAS_DESC', 'Anti-Asian-count'])
df4[:50]
| Anti-Asian-rate | |
|---|---|
| STATE_NAME | |
| Alabama | 0.000000 |
| Alaska | 0.000000 |
| Arizona | 0.004785 |
| Arkansas | 0.029412 |
| California | 0.042157 |
| Colorado | 0.027650 |
| Connecticut | 0.023256 |
| Delaware | 0.000000 |
| District of Columbia | 0.004505 |
| Federal | 0.000000 |
| Florida | 0.017544 |
| Georgia | 0.028037 |
| Hawaii | 0.048780 |
| Idaho | 0.083333 |
| Illinois | 0.015385 |
| Indiana | 0.000000 |
| Iowa | 0.100000 |
| Kansas | 0.000000 |
| Kentucky | 0.013699 |
| Louisiana | 0.000000 |
| Maine | 0.000000 |
| Maryland | 0.000000 |
| Massachusetts | 0.033419 |
| Michigan | 0.013825 |
| Minnesota | 0.000000 |
| Mississippi | 0.111111 |
| Missouri | 0.010526 |
| Montana | 0.027027 |
| Nebraska | 0.000000 |
| Nevada | 0.045455 |
| New Hampshire | 0.062500 |
| New Jersey | 0.029382 |
| New Mexico | 0.000000 |
| New York | 0.009804 |
| North Carolina | 0.016736 |
| North Dakota | 0.055556 |
| Ohio | 0.017857 |
| Oklahoma | 0.000000 |
| Oregon | 0.039773 |
| Pennsylvania | 0.048780 |
| Rhode Island | 0.000000 |
| South Carolina | 0.000000 |
| South Dakota | 0.000000 |
| Tennessee | 0.000000 |
| Texas | 0.012605 |
| Utah | 0.000000 |
| Vermont | 0.000000 |
| Virginia | 0.023810 |
| Washington | 0.044199 |
| West Virginia | 0.018868 |
df4['Model_2009-2018']=df3['Anti-Asian-rate']
df4= df4.rename(columns={"Anti-Asian-rate": "Observed_2019"})
df4[:50]
| Observed_2019 | Model_2009-2018 | |
|---|---|---|
| STATE_NAME | ||
| Alabama | 0.000000 | 0.012316 |
| Alaska | 0.000000 | 0.025758 |
| Arizona | 0.004785 | 0.018107 |
| Arkansas | 0.029412 | 0.009146 |
| California | 0.042157 | 0.028152 |
| Colorado | 0.027650 | 0.016700 |
| Connecticut | 0.023256 | 0.031444 |
| Delaware | 0.000000 | 0.030093 |
| District of Columbia | 0.004505 | 0.010958 |
| Federal | 0.000000 | 0.000000 |
| Florida | 0.017544 | 0.011173 |
| Georgia | 0.028037 | 0.027123 |
| Hawaii | 0.048780 | 0.136364 |
| Idaho | 0.083333 | 0.012256 |
| Illinois | 0.015385 | 0.017934 |
| Indiana | 0.000000 | 0.026156 |
| Iowa | 0.100000 | 0.055120 |
| Kansas | 0.000000 | 0.018572 |
| Kentucky | 0.013699 | 0.014771 |
| Louisiana | 0.000000 | 0.002500 |
| Maine | 0.000000 | 0.013078 |
| Maryland | 0.000000 | 0.021386 |
| Massachusetts | 0.033419 | 0.033657 |
| Michigan | 0.013825 | 0.021604 |
| Minnesota | 0.000000 | 0.030746 |
| Mississippi | 0.111111 | 0.048990 |
| Missouri | 0.010526 | 0.012809 |
| Montana | 0.027027 | 0.000000 |
| Nebraska | 0.000000 | 0.027177 |
| Nevada | 0.045455 | 0.015617 |
| New Hampshire | 0.062500 | 0.048537 |
| New Jersey | 0.029382 | 0.021666 |
| New Mexico | 0.000000 | 0.021558 |
| New York | 0.009804 | 0.013942 |
| North Carolina | 0.016736 | 0.010333 |
| North Dakota | 0.055556 | 0.011749 |
| Ohio | 0.017857 | 0.011713 |
| Oklahoma | 0.000000 | 0.017184 |
| Oregon | 0.039773 | 0.008745 |
| Pennsylvania | 0.048780 | 0.038109 |
| Rhode Island | 0.000000 | 0.018803 |
| South Carolina | 0.000000 | 0.005007 |
| South Dakota | 0.000000 | 0.004545 |
| Tennessee | 0.000000 | 0.008233 |
| Texas | 0.012605 | 0.021408 |
| Utah | 0.000000 | 0.015070 |
| Vermont | 0.000000 | 0.035049 |
| Virginia | 0.023810 | 0.022477 |
| Washington | 0.044199 | 0.033506 |
| West Virginia | 0.018868 | 0.019253 |
$P(O|M)\approx1-|O-E|$
df4[['Model_2009-2018','Observed_2019']] = df4[['Model_2009-2018','Observed_2019']]
df4['P(O|M)']=1-abs(df4['Model_2009-2018']-df4['Observed_2019'])
df4[:50]
| Observed_2019 | Model_2009-2018 | P(O|M) | |
|---|---|---|---|
| STATE_NAME | |||
| Alabama | 0.000000 | 0.012316 | 0.987684 |
| Alaska | 0.000000 | 0.025758 | 0.974242 |
| Arizona | 0.004785 | 0.018107 | 0.986677 |
| Arkansas | 0.029412 | 0.009146 | 0.979734 |
| California | 0.042157 | 0.028152 | 0.985995 |
| Colorado | 0.027650 | 0.016700 | 0.989050 |
| Connecticut | 0.023256 | 0.031444 | 0.991812 |
| Delaware | 0.000000 | 0.030093 | 0.969907 |
| District of Columbia | 0.004505 | 0.010958 | 0.993546 |
| Federal | 0.000000 | 0.000000 | 1.000000 |
| Florida | 0.017544 | 0.011173 | 0.993629 |
| Georgia | 0.028037 | 0.027123 | 0.999086 |
| Hawaii | 0.048780 | 0.136364 | 0.912417 |
| Idaho | 0.083333 | 0.012256 | 0.928923 |
| Illinois | 0.015385 | 0.017934 | 0.997451 |
| Indiana | 0.000000 | 0.026156 | 0.973844 |
| Iowa | 0.100000 | 0.055120 | 0.955120 |
| Kansas | 0.000000 | 0.018572 | 0.981428 |
| Kentucky | 0.013699 | 0.014771 | 0.998927 |
| Louisiana | 0.000000 | 0.002500 | 0.997500 |
| Maine | 0.000000 | 0.013078 | 0.986922 |
| Maryland | 0.000000 | 0.021386 | 0.978614 |
| Massachusetts | 0.033419 | 0.033657 | 0.999763 |
| Michigan | 0.013825 | 0.021604 | 0.992220 |
| Minnesota | 0.000000 | 0.030746 | 0.969254 |
| Mississippi | 0.111111 | 0.048990 | 0.937879 |
| Missouri | 0.010526 | 0.012809 | 0.997718 |
| Montana | 0.027027 | 0.000000 | 0.972973 |
| Nebraska | 0.000000 | 0.027177 | 0.972823 |
| Nevada | 0.045455 | 0.015617 | 0.970162 |
| New Hampshire | 0.062500 | 0.048537 | 0.986037 |
| New Jersey | 0.029382 | 0.021666 | 0.992284 |
| New Mexico | 0.000000 | 0.021558 | 0.978442 |
| New York | 0.009804 | 0.013942 | 0.995861 |
| North Carolina | 0.016736 | 0.010333 | 0.993596 |
| North Dakota | 0.055556 | 0.011749 | 0.956194 |
| Ohio | 0.017857 | 0.011713 | 0.993855 |
| Oklahoma | 0.000000 | 0.017184 | 0.982816 |
| Oregon | 0.039773 | 0.008745 | 0.968973 |
| Pennsylvania | 0.048780 | 0.038109 | 0.989329 |
| Rhode Island | 0.000000 | 0.018803 | 0.981197 |
| South Carolina | 0.000000 | 0.005007 | 0.994993 |
| South Dakota | 0.000000 | 0.004545 | 0.995455 |
| Tennessee | 0.000000 | 0.008233 | 0.991767 |
| Texas | 0.012605 | 0.021408 | 0.991197 |
| Utah | 0.000000 | 0.015070 | 0.984930 |
| Vermont | 0.000000 | 0.035049 | 0.964951 |
| Virginia | 0.023810 | 0.022477 | 0.998668 |
| Washington | 0.044199 | 0.033506 | 0.989307 |
| West Virginia | 0.018868 | 0.019253 | 0.999614 |
$p(θ|y)=p(y|θ)p(θ)$
df4['P(M|O)']=df4['P(O|M)']*df4['Model_2009-2018']
df4[:50]
| Observed_2019 | Model_2009-2018 | P(O|M) | P(M|O) | |
|---|---|---|---|---|
| STATE_NAME | ||||
| Alabama | 0.000000 | 0.012316 | 0.987684 | 0.012164 |
| Alaska | 0.000000 | 0.025758 | 0.974242 | 0.025094 |
| Arizona | 0.004785 | 0.018107 | 0.986677 | 0.017866 |
| Arkansas | 0.029412 | 0.009146 | 0.979734 | 0.008960 |
| California | 0.042157 | 0.028152 | 0.985995 | 0.027758 |
| Colorado | 0.027650 | 0.016700 | 0.989050 | 0.016517 |
| Connecticut | 0.023256 | 0.031444 | 0.991812 | 0.031186 |
| Delaware | 0.000000 | 0.030093 | 0.969907 | 0.029188 |
| District of Columbia | 0.004505 | 0.010958 | 0.993546 | 0.010887 |
| Federal | 0.000000 | 0.000000 | 1.000000 | 0.000000 |
| Florida | 0.017544 | 0.011173 | 0.993629 | 0.011101 |
| Georgia | 0.028037 | 0.027123 | 0.999086 | 0.027098 |
| Hawaii | 0.048780 | 0.136364 | 0.912417 | 0.124420 |
| Idaho | 0.083333 | 0.012256 | 0.928923 | 0.011385 |
| Illinois | 0.015385 | 0.017934 | 0.997451 | 0.017888 |
| Indiana | 0.000000 | 0.026156 | 0.973844 | 0.025472 |
| Iowa | 0.100000 | 0.055120 | 0.955120 | 0.052647 |
| Kansas | 0.000000 | 0.018572 | 0.981428 | 0.018227 |
| Kentucky | 0.013699 | 0.014771 | 0.998927 | 0.014755 |
| Louisiana | 0.000000 | 0.002500 | 0.997500 | 0.002494 |
| Maine | 0.000000 | 0.013078 | 0.986922 | 0.012907 |
| Maryland | 0.000000 | 0.021386 | 0.978614 | 0.020928 |
| Massachusetts | 0.033419 | 0.033657 | 0.999763 | 0.033649 |
| Michigan | 0.013825 | 0.021604 | 0.992220 | 0.021436 |
| Minnesota | 0.000000 | 0.030746 | 0.969254 | 0.029800 |
| Mississippi | 0.111111 | 0.048990 | 0.937879 | 0.045947 |
| Missouri | 0.010526 | 0.012809 | 0.997718 | 0.012779 |
| Montana | 0.027027 | 0.000000 | 0.972973 | 0.000000 |
| Nebraska | 0.000000 | 0.027177 | 0.972823 | 0.026438 |
| Nevada | 0.045455 | 0.015617 | 0.970162 | 0.015151 |
| New Hampshire | 0.062500 | 0.048537 | 0.986037 | 0.047859 |
| New Jersey | 0.029382 | 0.021666 | 0.992284 | 0.021499 |
| New Mexico | 0.000000 | 0.021558 | 0.978442 | 0.021093 |
| New York | 0.009804 | 0.013942 | 0.995861 | 0.013885 |
| North Carolina | 0.016736 | 0.010333 | 0.993596 | 0.010266 |
| North Dakota | 0.055556 | 0.011749 | 0.956194 | 0.011234 |
| Ohio | 0.017857 | 0.011713 | 0.993855 | 0.011641 |
| Oklahoma | 0.000000 | 0.017184 | 0.982816 | 0.016889 |
| Oregon | 0.039773 | 0.008745 | 0.968973 | 0.008474 |
| Pennsylvania | 0.048780 | 0.038109 | 0.989329 | 0.037702 |
| Rhode Island | 0.000000 | 0.018803 | 0.981197 | 0.018450 |
| South Carolina | 0.000000 | 0.005007 | 0.994993 | 0.004982 |
| South Dakota | 0.000000 | 0.004545 | 0.995455 | 0.004525 |
| Tennessee | 0.000000 | 0.008233 | 0.991767 | 0.008166 |
| Texas | 0.012605 | 0.021408 | 0.991197 | 0.021220 |
| Utah | 0.000000 | 0.015070 | 0.984930 | 0.014843 |
| Vermont | 0.000000 | 0.035049 | 0.964951 | 0.033821 |
| Virginia | 0.023810 | 0.022477 | 0.998668 | 0.022447 |
| Washington | 0.044199 | 0.033506 | 0.989307 | 0.033148 |
| West Virginia | 0.018868 | 0.019253 | 0.999614 | 0.019246 |
from scipy.stats import entropy
import numpy as np
df4['Signed_Surprise']= df4['P(M|O)']*np.log(df4['P(M|O)']/df4['Model_2009-2018'])*np.sign(df4['Model_2009-2018']-df4['Observed_2019'])
df4[:80]
| Observed_2019 | Model_2009-2018 | P(O|M) | P(M|O) | Signed_Surprise | |
|---|---|---|---|---|---|
| STATE_NAME | |||||
| Alabama | 0.000000 | 0.012316 | 0.987684 | 0.012164 | -0.000151 |
| Alaska | 0.000000 | 0.025758 | 0.974242 | 0.025094 | -0.000655 |
| Arizona | 0.004785 | 0.018107 | 0.986677 | 0.017866 | -0.000240 |
| Arkansas | 0.029412 | 0.009146 | 0.979734 | 0.008960 | 0.000183 |
| California | 0.042157 | 0.028152 | 0.985995 | 0.027758 | 0.000391 |
| Colorado | 0.027650 | 0.016700 | 0.989050 | 0.016517 | 0.000182 |
| Connecticut | 0.023256 | 0.031444 | 0.991812 | 0.031186 | -0.000256 |
| Delaware | 0.000000 | 0.030093 | 0.969907 | 0.029188 | -0.000892 |
| District of Columbia | 0.004505 | 0.010958 | 0.993546 | 0.010887 | -0.000070 |
| Federal | 0.000000 | 0.000000 | 1.000000 | 0.000000 | NaN |
| Florida | 0.017544 | 0.011173 | 0.993629 | 0.011101 | 0.000071 |
| Georgia | 0.028037 | 0.027123 | 0.999086 | 0.027098 | 0.000025 |
| Hawaii | 0.048780 | 0.136364 | 0.912417 | 0.124420 | -0.011404 |
| Idaho | 0.083333 | 0.012256 | 0.928923 | 0.011385 | 0.000839 |
| Illinois | 0.015385 | 0.017934 | 0.997451 | 0.017888 | -0.000046 |
| Indiana | 0.000000 | 0.026156 | 0.973844 | 0.025472 | -0.000675 |
| Iowa | 0.100000 | 0.055120 | 0.955120 | 0.052647 | 0.002417 |
| Kansas | 0.000000 | 0.018572 | 0.981428 | 0.018227 | -0.000342 |
| Kentucky | 0.013699 | 0.014771 | 0.998927 | 0.014755 | -0.000016 |
| Louisiana | 0.000000 | 0.002500 | 0.997500 | 0.002494 | -0.000006 |
| Maine | 0.000000 | 0.013078 | 0.986922 | 0.012907 | -0.000170 |
| Maryland | 0.000000 | 0.021386 | 0.978614 | 0.020928 | -0.000452 |
| Massachusetts | 0.033419 | 0.033657 | 0.999763 | 0.033649 | -0.000008 |
| Michigan | 0.013825 | 0.021604 | 0.992220 | 0.021436 | -0.000167 |
| Minnesota | 0.000000 | 0.030746 | 0.969254 | 0.029800 | -0.000931 |
| Mississippi | 0.111111 | 0.048990 | 0.937879 | 0.045947 | 0.002947 |
| Missouri | 0.010526 | 0.012809 | 0.997718 | 0.012779 | -0.000029 |
| Montana | 0.027027 | 0.000000 | 0.972973 | 0.000000 | NaN |
| Nebraska | 0.000000 | 0.027177 | 0.972823 | 0.026438 | -0.000728 |
| Nevada | 0.045455 | 0.015617 | 0.970162 | 0.015151 | 0.000459 |
| New Hampshire | 0.062500 | 0.048537 | 0.986037 | 0.047859 | 0.000673 |
| New Jersey | 0.029382 | 0.021666 | 0.992284 | 0.021499 | 0.000167 |
| New Mexico | 0.000000 | 0.021558 | 0.978442 | 0.021093 | -0.000460 |
| New York | 0.009804 | 0.013942 | 0.995861 | 0.013885 | -0.000058 |
| North Carolina | 0.016736 | 0.010333 | 0.993596 | 0.010266 | 0.000066 |
| North Dakota | 0.055556 | 0.011749 | 0.956194 | 0.011234 | 0.000503 |
| Ohio | 0.017857 | 0.011713 | 0.993855 | 0.011641 | 0.000072 |
| Oklahoma | 0.000000 | 0.017184 | 0.982816 | 0.016889 | -0.000293 |
| Oregon | 0.039773 | 0.008745 | 0.968973 | 0.008474 | 0.000267 |
| Pennsylvania | 0.048780 | 0.038109 | 0.989329 | 0.037702 | 0.000404 |
| Rhode Island | 0.000000 | 0.018803 | 0.981197 | 0.018450 | -0.000350 |
| South Carolina | 0.000000 | 0.005007 | 0.994993 | 0.004982 | -0.000025 |
| South Dakota | 0.000000 | 0.004545 | 0.995455 | 0.004525 | -0.000021 |
| Tennessee | 0.000000 | 0.008233 | 0.991767 | 0.008166 | -0.000068 |
| Texas | 0.012605 | 0.021408 | 0.991197 | 0.021220 | -0.000188 |
| Utah | 0.000000 | 0.015070 | 0.984930 | 0.014843 | -0.000225 |
| Vermont | 0.000000 | 0.035049 | 0.964951 | 0.033821 | -0.001207 |
| Virginia | 0.023810 | 0.022477 | 0.998668 | 0.022447 | 0.000030 |
| Washington | 0.044199 | 0.033506 | 0.989307 | 0.033148 | 0.000356 |
| West Virginia | 0.018868 | 0.019253 | 0.999614 | 0.019246 | -0.000007 |
| Wisconsin | 0.000000 | 0.027947 | 0.972053 | 0.027166 | -0.000770 |
| Wyoming | 0.000000 | 0.000000 | 1.000000 | 0.000000 | NaN |
df4['Signed_Surprise'] = df4['Signed_Surprise'].fillna(0)
df4['State_Name'] = df4.index
df4[:80]
| Observed_2019 | Model_2009-2018 | P(O|M) | P(M|O) | Signed_Surprise | State_Name | |
|---|---|---|---|---|---|---|
| STATE_NAME | ||||||
| Alabama | 0.000000 | 0.012316 | 0.987684 | 0.012164 | -0.000151 | Alabama |
| Alaska | 0.000000 | 0.025758 | 0.974242 | 0.025094 | -0.000655 | Alaska |
| Arizona | 0.004785 | 0.018107 | 0.986677 | 0.017866 | -0.000240 | Arizona |
| Arkansas | 0.029412 | 0.009146 | 0.979734 | 0.008960 | 0.000183 | Arkansas |
| California | 0.042157 | 0.028152 | 0.985995 | 0.027758 | 0.000391 | California |
| Colorado | 0.027650 | 0.016700 | 0.989050 | 0.016517 | 0.000182 | Colorado |
| Connecticut | 0.023256 | 0.031444 | 0.991812 | 0.031186 | -0.000256 | Connecticut |
| Delaware | 0.000000 | 0.030093 | 0.969907 | 0.029188 | -0.000892 | Delaware |
| District of Columbia | 0.004505 | 0.010958 | 0.993546 | 0.010887 | -0.000070 | District of Columbia |
| Federal | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | Federal |
| Florida | 0.017544 | 0.011173 | 0.993629 | 0.011101 | 0.000071 | Florida |
| Georgia | 0.028037 | 0.027123 | 0.999086 | 0.027098 | 0.000025 | Georgia |
| Hawaii | 0.048780 | 0.136364 | 0.912417 | 0.124420 | -0.011404 | Hawaii |
| Idaho | 0.083333 | 0.012256 | 0.928923 | 0.011385 | 0.000839 | Idaho |
| Illinois | 0.015385 | 0.017934 | 0.997451 | 0.017888 | -0.000046 | Illinois |
| Indiana | 0.000000 | 0.026156 | 0.973844 | 0.025472 | -0.000675 | Indiana |
| Iowa | 0.100000 | 0.055120 | 0.955120 | 0.052647 | 0.002417 | Iowa |
| Kansas | 0.000000 | 0.018572 | 0.981428 | 0.018227 | -0.000342 | Kansas |
| Kentucky | 0.013699 | 0.014771 | 0.998927 | 0.014755 | -0.000016 | Kentucky |
| Louisiana | 0.000000 | 0.002500 | 0.997500 | 0.002494 | -0.000006 | Louisiana |
| Maine | 0.000000 | 0.013078 | 0.986922 | 0.012907 | -0.000170 | Maine |
| Maryland | 0.000000 | 0.021386 | 0.978614 | 0.020928 | -0.000452 | Maryland |
| Massachusetts | 0.033419 | 0.033657 | 0.999763 | 0.033649 | -0.000008 | Massachusetts |
| Michigan | 0.013825 | 0.021604 | 0.992220 | 0.021436 | -0.000167 | Michigan |
| Minnesota | 0.000000 | 0.030746 | 0.969254 | 0.029800 | -0.000931 | Minnesota |
| Mississippi | 0.111111 | 0.048990 | 0.937879 | 0.045947 | 0.002947 | Mississippi |
| Missouri | 0.010526 | 0.012809 | 0.997718 | 0.012779 | -0.000029 | Missouri |
| Montana | 0.027027 | 0.000000 | 0.972973 | 0.000000 | 0.000000 | Montana |
| Nebraska | 0.000000 | 0.027177 | 0.972823 | 0.026438 | -0.000728 | Nebraska |
| Nevada | 0.045455 | 0.015617 | 0.970162 | 0.015151 | 0.000459 | Nevada |
| New Hampshire | 0.062500 | 0.048537 | 0.986037 | 0.047859 | 0.000673 | New Hampshire |
| New Jersey | 0.029382 | 0.021666 | 0.992284 | 0.021499 | 0.000167 | New Jersey |
| New Mexico | 0.000000 | 0.021558 | 0.978442 | 0.021093 | -0.000460 | New Mexico |
| New York | 0.009804 | 0.013942 | 0.995861 | 0.013885 | -0.000058 | New York |
| North Carolina | 0.016736 | 0.010333 | 0.993596 | 0.010266 | 0.000066 | North Carolina |
| North Dakota | 0.055556 | 0.011749 | 0.956194 | 0.011234 | 0.000503 | North Dakota |
| Ohio | 0.017857 | 0.011713 | 0.993855 | 0.011641 | 0.000072 | Ohio |
| Oklahoma | 0.000000 | 0.017184 | 0.982816 | 0.016889 | -0.000293 | Oklahoma |
| Oregon | 0.039773 | 0.008745 | 0.968973 | 0.008474 | 0.000267 | Oregon |
| Pennsylvania | 0.048780 | 0.038109 | 0.989329 | 0.037702 | 0.000404 | Pennsylvania |
| Rhode Island | 0.000000 | 0.018803 | 0.981197 | 0.018450 | -0.000350 | Rhode Island |
| South Carolina | 0.000000 | 0.005007 | 0.994993 | 0.004982 | -0.000025 | South Carolina |
| South Dakota | 0.000000 | 0.004545 | 0.995455 | 0.004525 | -0.000021 | South Dakota |
| Tennessee | 0.000000 | 0.008233 | 0.991767 | 0.008166 | -0.000068 | Tennessee |
| Texas | 0.012605 | 0.021408 | 0.991197 | 0.021220 | -0.000188 | Texas |
| Utah | 0.000000 | 0.015070 | 0.984930 | 0.014843 | -0.000225 | Utah |
| Vermont | 0.000000 | 0.035049 | 0.964951 | 0.033821 | -0.001207 | Vermont |
| Virginia | 0.023810 | 0.022477 | 0.998668 | 0.022447 | 0.000030 | Virginia |
| Washington | 0.044199 | 0.033506 | 0.989307 | 0.033148 | 0.000356 | Washington |
| West Virginia | 0.018868 | 0.019253 | 0.999614 | 0.019246 | -0.000007 | West Virginia |
| Wisconsin | 0.000000 | 0.027947 | 0.972053 | 0.027166 | -0.000770 | Wisconsin |
| Wyoming | 0.000000 | 0.000000 | 1.000000 | 0.000000 | 0.000000 | Wyoming |
import plotly.express as px
fig0 = px.choropleth(df4, geojson=states, locations='State_Name', color='Observed_2019',featureidkey="properties.name",
color_continuous_scale="blues",
scope="usa",
labels={"USA 2019 Asian Hate Crimes Rate Observed Heat Map"}
)
fig0.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig1 = px.choropleth(df4, geojson=states, locations='State_Name', color='Signed_Surprise',featureidkey="properties.name",
color_continuous_scale="rdbu",
range_color=(-0.003,0.003),
scope="usa",
labels={"USA 2019 Asian Hate Crimes Bayesian Surprise Map based on last 10 years"}
)
fig1.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
df3 = df2.query('DATA_YEAR>=1999 & DATA_YEAR<=2018').groupby(['STATE_NAME']).agg('mean')
df5 = df2.query('DATA_YEAR==2019').groupby(['STATE_NAME']).agg('mean')
df5 = df5.drop(columns=['BIAS_DESC', 'Anti-Asian-count'])
df5['Model_1999-2018']=df3['Anti-Asian-rate']
df5= df5.rename(columns={"Anti-Asian-rate": "Observed_2019"})
df5[['Model_1999-2018','Observed_2019']] = df5[['Model_1999-2018','Observed_2019']]
df5['P(O|M)']=1-abs(df5['Model_1999-2018']-df5['Observed_2019'])
df5['P(M|O)']=df5['P(O|M)']*df5['Model_1999-2018']
df5['Signed_Surprise']= df5['P(M|O)']*np.log(df5['P(M|O)']/df5['Model_1999-2018'])*np.sign(df5['Model_1999-2018']-df5['Observed_2019'])
df5['Signed_Surprise'] = df5['Signed_Surprise'].fillna(0)
df5['State_Name'] = df5.index
df5[:80]
fig2 = px.choropleth(df5, geojson=states, locations='State_Name', color='Signed_Surprise',featureidkey="properties.name",
color_continuous_scale="rdbu",
range_color=(-0.003,0.003),
scope="usa",
labels={"2019 Asian Hate Crimes Bayesian Surprise Map based on last 20 years"}
)
fig2.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig0.show()
fig1.show()
fig2.show()